{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Configuring pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# import numpy and pandas\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "# used for dates\n",
    "import datetime\n",
    "from datetime import datetime, date\n",
    "\n",
    "# Set some pandas options controlling output format\n",
    "pd.set_option('display.notebook_repr_html', False)\n",
    "pd.set_option('display.max_columns', 7)\n",
    "pd.set_option('display.max_rows', 10)\n",
    "pd.set_option('display.width', 60)\n",
    "\n",
    "# read in the data and print the first five rows\n",
    "# use the Symbol column as the index, and \n",
    "# only read in columns in positions 0, 2, 3, 7\n",
    "sp500 = pd.read_csv(\"data/sp500.csv\", \n",
    "                    index_col='Symbol', \n",
    "                    usecols=[0, 2, 3, 7])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Renaming columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue\n",
       "Symbol                                \n",
       "MMM     Industrials  141.14     26.668\n",
       "ABT     Health Care   39.60     15.573"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# rename the Book Value column to not have a space\n",
    "# this returns a copy with the column renamed\n",
    "newSP500 = sp500.rename(columns=\n",
    "                        {'Book Value': 'BookValue'})\n",
    "# print first 2 rows\n",
    "newSP500[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Sector', 'Price', 'Book Value'], dtype='object')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# verify the columns in the original did not change\n",
    "sp500.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Sector', 'Price', 'BookValue'], dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this changes the column in-place\n",
    "sp500.rename(columns=                  \n",
    "             {'Book Value': 'BookValue'},                   \n",
    "             inplace=True)\n",
    "# we can see the column is changed\n",
    "sp500.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Symbol\n",
       "MMM     26.668\n",
       "ABT     15.573\n",
       "ABBV     2.954\n",
       "ACN      8.326\n",
       "ACE     86.897\n",
       "Name: BookValue, dtype: float64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# and now we can use .BookValue\n",
    "sp500.BookValue[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Adding new columns with [] and .insert()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue  RoundedPrice\n",
       "Symbol                                              \n",
       "MMM     Industrials  141.14     26.668         141.0\n",
       "ABT     Health Care   39.60     15.573          40.0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# make a copy so that we keep the original data unchanged\n",
    "sp500_copy = sp500.copy()\n",
    "# add the new column\n",
    "sp500_copy['RoundedPrice'] = sp500.Price.round()\n",
    "sp500_copy[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector  RoundedPrice   Price  BookValue\n",
       "Symbol                                              \n",
       "MMM     Industrials         141.0  141.14     26.668\n",
       "ABT     Health Care          40.0   39.60     15.573"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# make a copy so that we keep the original data unchanged\n",
    "copy = sp500.copy()\n",
    "# insert sp500.Price * 2 as the \n",
    "# second column in the DataFrame\n",
    "copy.insert(1, 'RoundedPrice', sp500.Price.round())\n",
    "copy[:2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Adding columns through enlargement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue  PER\n",
       "Symbol                                     \n",
       "MMM     Industrials  141.14     26.668    0\n",
       "ABT     Health Care   39.60     15.573    0\n",
       "ABBV    Health Care   53.95      2.954    0"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# copy of subset / slice\n",
    "ss = sp500[:3].copy()\n",
    "# add the new column initialized to 0\n",
    "ss.loc[:,'PER'] = 0\n",
    "# take a look at the results\n",
    "ss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue       PER\n",
       "Symbol                                          \n",
       "MMM     Industrials  141.14     26.668  0.469112\n",
       "ABT     Health Care   39.60     15.573 -0.282863\n",
       "ABBV    Health Care   53.95      2.954 -1.509059"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# copy of subset / slice\n",
    "ss = sp500[:3].copy()\n",
    "# add the new column initialized with random numbers\n",
    "np.random.seed(123456)\n",
    "ss.loc[:,'PER'] = pd.Series(np.random.normal(size=3), index=ss.index)\n",
    "# take a look at the results\n",
    "ss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Adding columns using concatenation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                        Sector   Price  BookValue  \\\n",
       "Symbol                                              \n",
       "MMM                Industrials  141.14     26.668   \n",
       "ABT                Health Care   39.60     15.573   \n",
       "ABBV               Health Care   53.95      2.954   \n",
       "ACN     Information Technology   79.79      8.326   \n",
       "ACE                 Financials  102.91     86.897   \n",
       "\n",
       "        RoundedPrice  \n",
       "Symbol                \n",
       "MMM            141.0  \n",
       "ABT             40.0  \n",
       "ABBV            54.0  \n",
       "ACN             80.0  \n",
       "ACE            103.0  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a DataFrame with only the RoundedPrice column\n",
    "rounded_price = pd.DataFrame({'RoundedPrice':    \n",
    "                              sp500.Price.round()})\n",
    "# concatenate along the columns axis\n",
    "concatenated = pd.concat([sp500, rounded_price], axis=1)\n",
    "concatenated[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "        Price\n",
       "Symbol       \n",
       "MMM     141.0\n",
       "ABT      40.0\n",
       "ABBV     54.0\n",
       "ACN      80.0\n",
       "ACE     103.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a DataFrame with only the RoundedPrice column\n",
    "rounded_price = pd.DataFrame({'Price': sp500.Price.round()})\n",
    "rounded_price[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                        Sector   Price  BookValue  Price\n",
       "Symbol                                                  \n",
       "MMM                Industrials  141.14     26.668  141.0\n",
       "ABT                Health Care   39.60     15.573   40.0\n",
       "ABBV               Health Care   53.95      2.954   54.0\n",
       "ACN     Information Technology   79.79      8.326   80.0\n",
       "ACE                 Financials  102.91     86.897  103.0"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this will result in duplicate Price columm\n",
    "dups = pd.concat([sp500, rounded_price], axis=1)\n",
    "dups[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "         Price  Price\n",
       "Symbol               \n",
       "MMM     141.14  141.0\n",
       "ABT      39.60   40.0\n",
       "ABBV     53.95   54.0\n",
       "ACN      79.79   80.0\n",
       "ACE     102.91  103.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# retrieves both Price columns\n",
    "dups.Price[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Reordering columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "        BookValue   Price                  Sector\n",
       "Symbol                                           \n",
       "MMM        26.668  141.14             Industrials\n",
       "ABT        15.573   39.60             Health Care\n",
       "ABBV        2.954   53.95             Health Care\n",
       "ACN         8.326   79.79  Information Technology\n",
       "ACE        86.897  102.91              Financials"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# return a new DataFrame with the columns reversed\n",
    "reversed_column_names = sp500.columns[::-1]\n",
    "sp500[reversed_column_names][:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Replacing the contents of a column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                        Sector  Price  BookValue\n",
       "Symbol                                          \n",
       "MMM                Industrials  141.0     26.668\n",
       "ABT                Health Care   40.0     15.573\n",
       "ABBV               Health Care   54.0      2.954\n",
       "ACN     Information Technology   80.0      8.326\n",
       "ACE                 Financials  103.0     86.897"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this occurs in-place so let's use a copy\n",
    "copy = sp500.copy()\n",
    "# replace the Price column data with the new values\n",
    "# instead of adding a new column\n",
    "copy.Price = rounded_price.Price\n",
    "copy[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                        Sector  Price  BookValue\n",
       "Symbol                                          \n",
       "MMM                Industrials  141.0     26.668\n",
       "ABT                Health Care   40.0     15.573\n",
       "ABBV               Health Care   54.0      2.954\n",
       "ACN     Information Technology   80.0      8.326\n",
       "ACE                 Financials  103.0     86.897"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this occurs in-place so let's use a copy\n",
    "copy = sp500.copy()\n",
    "# replace the Price column data wwith rounded values\n",
    "copy.loc[:,'Price'] = rounded_price.Price\n",
    "copy[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Deleting columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price\n",
       "Symbol                     \n",
       "MMM     Industrials  141.14\n",
       "ABT     Health Care   39.60"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Example of using del to delete a column\n",
    "# make a copy as this is done in-place\n",
    "copy = sp500.copy()\n",
    "del copy['BookValue']\n",
    "copy[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "         Price  BookValue\n",
       "Symbol                   \n",
       "MMM     141.14     26.668\n",
       "ABT      39.60     15.573"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Example of using pop to remove a column from a DataFrame\n",
    "# first make a copy of a subset of the data frame as\n",
    "# pop works in place\n",
    "copy = sp500.copy()\n",
    "# this will remove Sector and return it as a series\n",
    "popped = copy.pop('Sector')\n",
    "# Sector column removed in-place\n",
    "copy[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Symbol\n",
       "MMM                Industrials\n",
       "ABT                Health Care\n",
       "ABBV               Health Care\n",
       "ACN     Information Technology\n",
       "ACE                 Financials\n",
       "Name: Sector, dtype: object"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# and we have the Sector column as the result of the pop\n",
    "popped[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "         Price  BookValue\n",
       "Symbol                   \n",
       "MMM     141.14     26.668\n",
       "ABT      39.60     15.573\n",
       "ABBV     53.95      2.954\n",
       "ACN      79.79      8.326\n",
       "ACE     102.91     86.897"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Example of using drop to remove a column \n",
    "# make a copy of a subset of the data frame\n",
    "copy = sp500.copy()\n",
    "# this will return a new DataFrame with 'Sector’ removed\n",
    "# the copy DataFrame is not modified\n",
    "afterdrop = copy.drop(['Sector'], axis = 1)\n",
    "afterdrop[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Appending rows from other DataFrame objects with .append()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue\n",
       "Symbol                                \n",
       "MMM     Industrials  141.14     26.668\n",
       "ABT     Health Care   39.60     15.573\n",
       "ABBV    Health Care   53.95      2.954\n",
       "A       Health Care   56.18     16.928\n",
       "GAS       Utilities   52.98     32.462\n",
       "ABBV    Health Care   53.95      2.954"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# copy the first three rows of sp500\n",
    "df1 = sp500.iloc[0:3].copy()\n",
    "# copy 10th and 11th rows\n",
    "df2 = sp500.iloc[[10, 11, 2]]\n",
    "# append df1 and df2\n",
    "appended = df1.append(df2)\n",
    "# the result is the rows of the first followed by \n",
    "# those of the second\n",
    "appended"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "        PER\n",
       "Symbol     \n",
       "MMM     0.0\n",
       "ABT     0.0\n",
       "ABBV    0.0"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# data frame using df1.index and just a PER column\n",
    "# also a good example of using a scalar value\n",
    "# to initialize multiple rows\n",
    "df3 = pd.DataFrame(0.0, \n",
    "                   index=df1.index,\n",
    "                   columns=['PER'])\n",
    "df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "        BookValue  PER   Price       Sector\n",
       "Symbol                                     \n",
       "MMM        26.668  NaN  141.14  Industrials\n",
       "ABT        15.573  NaN   39.60  Health Care\n",
       "ABBV        2.954  NaN   53.95  Health Care\n",
       "MMM           NaN  0.0     NaN          NaN\n",
       "ABT           NaN  0.0     NaN          NaN\n",
       "ABBV          NaN  0.0     NaN          NaN"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# append df1 and df3\n",
    "# each has three rows, so 6 rows is the result\n",
    "# df1 had no PER column, so NaN from for those rows\n",
    "# df3 had no BookValue, Price or Sector, so NaN's\n",
    "df1.append(df3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "   BookValue  PER   Price       Sector\n",
       "0     26.668  NaN  141.14  Industrials\n",
       "1     15.573  NaN   39.60  Health Care\n",
       "2      2.954  NaN   53.95  Health Care\n",
       "3        NaN  0.0     NaN          NaN\n",
       "4        NaN  0.0     NaN          NaN\n",
       "5        NaN  0.0     NaN          NaN"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ignore index labels, create default index\n",
    "df1.append(df3, ignore_index=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Concatenating rows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue\n",
       "Symbol                                \n",
       "MMM     Industrials  141.14     26.668\n",
       "ABT     Health Care   39.60     15.573\n",
       "ABBV    Health Care   53.95      2.954\n",
       "A       Health Care   56.18     16.928\n",
       "GAS       Utilities   52.98     32.462\n",
       "ABBV    Health Care   53.95      2.954"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# copy the first three rows of sp500\n",
    "df1 = sp500.iloc[0:3].copy()\n",
    "# copy 10th and 11th rows\n",
    "df2 = sp500.iloc[[10, 11, 2]]\n",
    "# pass them as a list\n",
    "pd.concat([df1, df2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector  Price  BookValue  Foo\n",
       "Symbol                                    \n",
       "A       Health Care  56.18     16.928    0\n",
       "GAS       Utilities  52.98     32.462    0\n",
       "ABBV    Health Care  53.95      2.954    0"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# copy df2\n",
    "df2_2 = df2.copy()\n",
    "# add a column to df2_2 that is not in df1\n",
    "df2_2.insert(3, 'Foo', pd.Series(0, index=df2.index))\n",
    "# see what it looks like\n",
    "df2_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "        BookValue  Foo   Price       Sector\n",
       "Symbol                                     \n",
       "MMM        26.668  NaN  141.14  Industrials\n",
       "ABT        15.573  NaN   39.60  Health Care\n",
       "ABBV        2.954  NaN   53.95  Health Care\n",
       "A          16.928  0.0   56.18  Health Care\n",
       "GAS        32.462  0.0   52.98    Utilities\n",
       "ABBV        2.954  0.0   53.95  Health Care"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# now concatenate\n",
    "pd.concat([df1, df2_2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "            BookValue  Foo   Price       Sector\n",
       "    Symbol                                     \n",
       "df1 MMM        26.668  NaN  141.14  Industrials\n",
       "    ABT        15.573  NaN   39.60  Health Care\n",
       "    ABBV        2.954  NaN   53.95  Health Care\n",
       "df2 A          16.928  0.0   56.18  Health Care\n",
       "    GAS        32.462  0.0   52.98    Utilities\n",
       "    ABBV        2.954  0.0   53.95  Health Care"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# specify keys\n",
    "r = pd.concat([df1, df2_2], keys=['df1', 'df2'])\n",
    "r"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Adding and replacing rows via setting with enlargement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue\n",
       "Symbol                                \n",
       "MMM     Industrials  141.14     26.668\n",
       "ABT     Health Care   39.60     15.573\n",
       "ABBV    Health Care   53.95      2.954\n",
       "FOO      the sector  100.00    110.000"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get a small subset of the sp500 \n",
    "# make sure to copy the slice to make a copy\n",
    "ss = sp500[:3].copy()\n",
    "# create a new row with index label FOO\n",
    "# and assign some values to the columns via a list\n",
    "ss.loc['FOO'] = ['the sector', 100, 110]\n",
    "ss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Removing rows using .drop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                        Sector   Price  BookValue\n",
       "Symbol                                           \n",
       "MMM                Industrials  141.14     26.668\n",
       "ABT                Health Care   39.60     15.573\n",
       "ABBV               Health Care   53.95      2.954\n",
       "ACN     Information Technology   79.79      8.326\n",
       "ACE                 Financials  102.91     86.897"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get a copy of the first 5 rows of sp500\n",
    "ss = sp500[:5]\n",
    "ss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue\n",
       "Symbol                                \n",
       "MMM     Industrials  141.14     26.668\n",
       "ABBV    Health Care   53.95      2.954\n",
       "ACE      Financials  102.91     86.897"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drop rows with labels ABT and ACN\n",
    "afterdrop = ss.drop(['ABT', 'ACN'])\n",
    "afterdrop[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Removing rows using Boolean selection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(500, 10)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# determine the rows where Price > 300\n",
    "selection = sp500.Price > 300\n",
    "# report number of rows and number that will be dropped\n",
    "(len(selection), selection.sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "                        Sector   Price  BookValue\n",
       "Symbol                                           \n",
       "MMM                Industrials  141.14     26.668\n",
       "ABT                Health Care   39.60     15.573\n",
       "ABBV               Health Care   53.95      2.954\n",
       "ACN     Information Technology   79.79      8.326\n",
       "ACE                 Financials  102.91     86.897\n",
       "...                        ...     ...        ...\n",
       "YHOO    Information Technology   35.02     12.768\n",
       "YUM     Consumer Discretionary   74.77      5.147\n",
       "ZMH                Health Care  101.84     37.181\n",
       "ZION                Financials   28.43     30.191\n",
       "ZTS                Health Care   30.53      2.150\n",
       "\n",
       "[490 rows x 3 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# select the complement of the expression\n",
    "# note the use of the complement of the selection\n",
    "price_less_than_300 = sp500[~selection]\n",
    "price_less_than_300"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Removing rows using a slice"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue\n",
       "Symbol                                \n",
       "MMM     Industrials  141.14     26.668\n",
       "ABT     Health Care   39.60     15.573\n",
       "ABBV    Health Care   53.95      2.954"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get only the first three rows\n",
    "only_first_three = sp500[:3]\n",
    "only_first_three"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "             Sector   Price  BookValue\n",
       "Symbol                                \n",
       "MMM     Industrials  141.14     26.668\n",
       "ABT     Health Care   39.60     15.573\n",
       "ABBV    Health Care   53.95      2.954"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# first three, but a copy of them\n",
    "only_first_three = sp500[:3].copy()\n",
    "only_first_three"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
